### Measuring Political Attitudes with Word Association - Diagonostics ###
## Ze Han, Ph.D. Student, Princeton University Department of Politics, zeh@princeton.edu ##
## Naijia Liu, Assistant Professor, Harvard University Department of Government, naijialiu@fas.harvard.edu ##
## Rory Truex, Associate Professor, Princeton University Department of Politics and School of Public and International Affairs, rtruex@princeton.edu ##

setwd("~/Desktop/WAT_PoQ/code")

## Load Packages and Functions ##
rm(list=ls(all=TRUE))

library(grid)
library(tidyverse)
library(lfe)
library(lmtest)
library(sandwich)
library(stargazer)

## Arrange Functions ##
vp.layout <- function(x, y) viewport(layout.pos.row=x, layout.pos.col=y)
arrange_ggplot2 <- function(..., nrow=NULL, ncol=NULL, as.table=FALSE) {
  dots <- list(...)
  n <- length(dots)
  if(is.null(nrow) & is.null(ncol)) { nrow = floor(n/2) ; ncol = ceiling(n/nrow)}
  if(is.null(nrow)) { nrow = ceiling(n/ncol)}
  if(is.null(ncol)) { ncol = ceiling(n/nrow)}
  
  grid.newpage()
  pushViewport(viewport(layout=grid.layout(nrow,ncol) ) )
  ii.p <- 1
  for(ii.row in seq(1, nrow)){
    ii.table.row <- ii.row  
    if(as.table) {ii.table.row <- nrow - ii.table.row + 1}
    for(ii.col in seq(1, ncol)){
      ii.table <- ii.p
      if(ii.p > n) break
      print(dots[[ii.table]], vp=vp.layout(ii.table.row, ii.col))
      ii.p <- ii.p + 1
    }
  }
}

## Load Datasets ##
data.wat <- read.csv("./data/data.wat.csv", stringsAsFactors = FALSE)
data.hk.wat <- read.csv("./data/data.hk.wat.csv", stringsAsFactors = FALSE)
wordkey <- read.csv("./data/wordkey.csv", stringsAsFactors = FALSE)

## Individual-Level Demographic Statistics ##
data.wat.desc <- data.wat %>% 
  select(ResponseID, female, age, res.village, hukou.agr, minority, lowed, ccp) %>% 
  distinct()

# Table SI1
stargazer(data.wat.desc,
          covariate.labels = c("Female",
                               "Age",
                               "Rural Residence",
                               "Agricultural Hukou",
                               "Ethnic Minority",
                               "Low Levels of Education",
                               "CCP Membership"))

data.hk.wat.desc <- data.hk.wat %>% 
  select(ResponseID, female, age, lowed) %>% 
  distinct()

# Table SI2
stargazer(data.hk.wat.desc,
          covariate.labels = c("Female",
                               "Age",
                               "Low Levels of Education"))

## WAT Performance Diagnostics ## 

# Figure 1: Histogram of Submission Latency for All Trials
pdf('./figures/fig-histogram-latencysubmission.pdf', width=8, height=4)                                                                                                                                                                                                                                                                                                                                                                                                                                              
p1 <- ggplot() + geom_histogram(aes(x=latency.submission), size=.5,alpha=.25, data.wat, bins=20) + 
  ggtitle("Study 1 - Mainland China") + 
  theme_classic() + theme(axis.title.x = element_text(size = 12, vjust = .25)) + 
  xlab("Latency - Trial Submission (seconds)") + ylab("Count") + 
  xlim(c(0, 21)) + geom_vline(xintercept = 20, linetype="dashed", size=.3) + 
  theme(plot.title = element_text(size = 9), axis.title = element_text(size = 9)) 
p2 <- ggplot() + geom_histogram(aes(x=latency.submission), size=.5,alpha=.25, data.hk.wat, bins=20) + 
  ggtitle("Study 2 - Hong Kong") + 
  theme_classic() + theme(axis.title.x = element_text(size = 12, vjust = .25)) + 
  xlab("Latency - Trial Submission (seconds)") + ylab("Count") + 
  xlim(c(0, 21)) + geom_vline(xintercept = 20, linetype="dashed", size=.3) + 
  theme(plot.title = element_text(size = 9), axis.title = element_text(size = 9))
arrange_ggplot2(p1, p2, nrow=1, ncol=2)
dev.off() 

# Time to submission and the number of response words
data.wat <- data.wat %>% 
  mutate(
    latency.submission.20 = ifelse(latency.submission > 19.5, 1, 0),
    latency.submission.10 = ifelse(latency.submission < 10, 1, 0)
  )

data.wat %>% 
  group_by(latency.submission.20) %>% 
  summarise(mean_tokens = mean(tokens.num)) # 2.76 tokens

data.wat %>% 
  group_by(latency.submission.10) %>% 
  summarise(mean_tokens = mean(tokens.num)) # 0.90 tokens

data.hk.wat <- data.hk.wat %>% 
  mutate(
    latency.submission.20 = ifelse(latency.submission > 19.5, 1, 0),
    latency.submission.10 = ifelse(latency.submission < 10, 1, 0)
  )

data.hk.wat %>% 
  group_by(latency.submission.20) %>% 
  summarise(mean_tokens = mean(tokens.num)) # 2.55 tokens

data.hk.wat %>% 
  group_by(latency.submission.10) %>% 
  summarise(mean_tokens = mean(tokens.num)) # 1.17 tokens

# Denote the core cues
data.wat <- data.wat %>% 
  mutate(
    core = ifelse(term.eng %in% c("CCP", "China", "democracy", "central government"), 1, 0)
  )

data.hk.wat <- data.hk.wat %>% 
  mutate(
    core = ifelse(term.eng %in% c("CCP", "China", "democracy", "central government"), 1, 0)
  )

# Analysis: Identifying Matching Behavior
data.wat <- data.wat %>% 
  mutate(
    term.chi2 = ifelse(term.chi == "共产党", "中共", term.chi),
    matching = ifelse(response == term.chi2, 1, 0)
  )

summary(data.wat$matching) # Roughly 2.065% of trials engaged in matching behavior

data.hk.wat <- data.hk.wat %>% 
  mutate(
    term.chi2 = ifelse(term.traditional == "共產黨", "中共", term.traditional),
    matching = ifelse(response == term.chi2, 1, 0)
  )

summary(data.hk.wat$matching) # Roughly 5.425% of trials engaged in matching behavior

#Analysis: Identifying Clickthrough Behavior (Mainland China)
data.wat <- data.wat %>% 
  mutate(
    nonresponse = ifelse(response == "", 1, 0)
  )

data.resp <- data.wat %>% 
  group_by(ResponseID) %>% 
  summarise(resp.nonresponse = sum(nonresponse, na.rm = TRUE)) %>% 
  mutate(clickthrough = ifelse(resp.nonresponse > 8, 1, 0))

summary(data.resp$clickthrough) # 20.4% of people are not responding to more than 50% of questions

data.bar.nonresponse <- data.resp %>% 
  count(resp.nonresponse) %>% 
  rename(nonresponse.total = resp.nonresponse, frequency = n) %>% 
  mutate(
  nonresponse.total = as.factor(nonresponse.total),
  group = ifelse(nonresponse.total %in% as.factor(9:18), "Invalid", "Valid")
  )

data.wat <- data.wat %>% 
  left_join(data.resp, by = "ResponseID")

data.wat.filt <- data.wat %>% 
  filter(clickthrough != 1, matching != 1)

# Analysis: Identifying Clickthrough Behavior (Hong Kong)
data.hk.wat <- data.hk.wat %>% 
  mutate(
    nonresponse = ifelse(response == "", 1, 0)
  )

data.hk.resp <- data.hk.wat %>% 
  group_by(ResponseID) %>% 
  summarise(resp.nonresponse = sum(nonresponse, na.rm = TRUE)) %>% 
  mutate(clickthrough = ifelse(resp.nonresponse > 8, 1, 0))

summary(data.hk.resp$clickthrough) # 5.4% of people are not responding to more than 50% of questions

data.hk.bar.nonresponse <- data.hk.resp %>% 
  count(resp.nonresponse) %>% 
  rename(nonresponse.total = resp.nonresponse, frequency = n) %>% 
  mutate(
    nonresponse.total = as.factor(nonresponse.total),
    group = ifelse(nonresponse.total %in% as.factor(9:18), "Invalid", "Valid")
  )

data.hk.wat <- data.hk.wat %>% 
  left_join(data.hk.resp, by = "ResponseID")

data.hk.wat.filt <- data.hk.wat %>% 
  filter(clickthrough != 1, matching != 1)

# Figure 2: Nonresponse Histogram
pdf('./figures/fig-nonresponse-histogram-sum.pdf', width=8, height=4)
p1 <- ggplot(data.bar.nonresponse, aes(x=nonresponse.total, y=frequency, fill=group)) + 
  ggtitle("Study 1 - Mainland China") + geom_col(alpha=.5,width=1) +theme_classic() + 
  xlab("Total Nonreponses") + ylab("Count")  +  
  geom_vline(xintercept = 9.5, size=.5, color="grey50",lty="dashed") + 
  annotate("text", x=14.5, y = 250, label = "Invalid Responses",color="grey20", size=2.9)  + 
  annotate("text", x=14.5, y = 200, label = "n=243, 20.4%",color="grey20", size=2.8) + 
  scale_color_manual(values=c("dodgerblue3","grey60"),guide="none", aesthetics = c("fill")) + 
  theme(plot.title = element_text(size = 9), axis.title = element_text(size = 9)) 
p2 <- ggplot(data.hk.bar.nonresponse, aes(x=nonresponse.total, y=frequency, fill=group)) + 
  ggtitle("Study 2 - Hong Kong") + geom_col(alpha=.5,width=1) +theme_classic() + 
  xlab("Total Nonreponses") + ylab("Count")  +  
  geom_vline(xintercept = 9.5, size=.5, color="grey50",lty="dashed") + 
  annotate("text", x=14.5, y = 250, label = "Invalid Responses",color="grey20", size=2.9)  + 
  annotate("text", x=14.5, y = 200, label = "n=55, 5.4%",color="grey20", size=2.8) + 
  scale_color_manual(values=c("dodgerblue3","grey60"),guide="none", aesthetics = c("fill")) + 
  theme(plot.title = element_text(size = 9), axis.title = element_text(size = 9)) 
arrange_ggplot2(p1, p2, nrow=1, ncol=2)
dev.off()

# Figure 4: Nonresponse Rates for All Cue Words (Mainland China) - Filtered
data.wat.filt <- data.wat.filt %>%
  mutate(nonresponse = ifelse(response == "", 1, 0))

data.trials <- data.wat.filt %>%
  group_by(term.eng) %>%
  summarize(trials = n(), .groups = 'drop')

data.nonresponse <- data.wat.filt %>%
  group_by(term.eng) %>%
  summarize(nonresponse = mean(nonresponse, na.rm = TRUE), .groups = 'drop') %>%
  inner_join(data.trials, by = "term.eng") %>%
  inner_join(wordkey, by = "term.eng") %>% 
  mutate(
    nonresponse.se = sqrt(abs(nonresponse * (1 - nonresponse)) / trials),
    nonresponse.u95ci = nonresponse + (1.96 * nonresponse.se),
    nonresponse.l95ci = nonresponse - (1.96 * nonresponse.se)
  )

pdf('./figures/fig-nonresponserates-filt.pdf', width=7.5, height=10.5)
ggplot(data.nonresponse, aes(x=nonresponse, y=reorder(term.eng,nonresponse), color=core)) + 
  xlab("Nonresponse Rate") + ylab("Term") + 
  theme(plot.title = element_text(lineheight=.8, face="bold")) + 
  scale_shape(solid = TRUE) + theme_classic() + theme(legend.title=element_blank()) + 
  geom_point(alpha=.5, size=2.5)  + 
  geom_segment(aes(y = reorder(term.eng,nonresponse), x = nonresponse.l95ci, xend = nonresponse.u95ci, yend = reorder(term.eng,nonresponse)), alpha=.4 , lwd=.8) + 
  theme(legend.position = "none")  + 
  scale_color_manual(values=c("dodgerblue3","grey75"),guide="none", aesthetics = c("fill","color"))
dev.off() 

# Figure SI4: Nonresponse Rates for All Cue Words (Hong Kong) - Filtered
data.hk.wat.filt <- data.hk.wat.filt %>%
  mutate(nonresponse = ifelse(response == "", 1, 0))

data.hk.trials <- data.hk.wat.filt %>%
  group_by(term.eng) %>%
  summarize(trials = n(), .groups = 'drop')

data.hk.nonresponse <- data.hk.wat.filt %>%
  group_by(term.eng) %>%
  summarize(nonresponse = mean(nonresponse, na.rm = TRUE), .groups = 'drop') %>%
  inner_join(data.trials, by = "term.eng") %>%
  inner_join(wordkey, by = "term.eng") %>% 
  mutate(
    nonresponse.se = sqrt(abs(nonresponse * (1 - nonresponse)) / trials),
    nonresponse.u95ci = nonresponse + (1.96 * nonresponse.se),
    nonresponse.l95ci = nonresponse - (1.96 * nonresponse.se)
  )

pdf('./figures/fig-nonresponserates-hk-filt.pdf', width=7.5, height=10.5)
ggplot(data.hk.nonresponse, aes(x=nonresponse, y=reorder(term.eng,nonresponse), color=core)) + 
  xlab("Nonresponse Rate") + ylab("Term") + 
  theme(plot.title = element_text(lineheight=.8, face="bold")) + scale_shape(solid = TRUE) + theme_classic() + 
  theme(legend.title=element_blank()) + geom_point(alpha=.5, size=2.5)  + 
  geom_segment(aes(y = reorder(term.eng,nonresponse), x = nonresponse.l95ci, xend = nonresponse.u95ci, yend = reorder(term.eng,nonresponse)), alpha=.4 , lwd=.8) + 
  theme(legend.position = "none")  + 
  scale_color_manual(values=c("dodgerblue3","grey75"),guide="none", aesthetics = c("fill","color"))
dev.off() 

# Figure SI2: Mean Latency to First Click for All Cue Words (Mainland China) - Filtered
data.latency.firstclick <- data.wat.filt %>%
  group_by(term.eng) %>%
  summarise(
    latency.firstclick = mean(latency.firstclick, na.rm = TRUE)
  )

data.latency.firstclick.sd <- data.wat.filt %>%
  group_by(term.eng) %>%
  summarise(
    latency.firstclick.sd = sd(latency.firstclick, na.rm = TRUE)
  )

data.latency.firstclick <- data.latency.firstclick %>% 
  left_join(data.latency.firstclick.sd, by = "term.eng")

data.nonresponse <- data.nonresponse %>%
  left_join(data.latency.firstclick, by = "term.eng") %>%
  mutate(
    latency.firstclick.se = latency.firstclick.sd / sqrt(trials),
    latency.firstclick.u95ci = latency.firstclick + (1.96 * latency.firstclick.se),
    latency.firstclick.l95ci = latency.firstclick - (1.96 * latency.firstclick.se)
  )

pdf('./figures/fig-latency.firstclick-filt.pdf', width=7.5, height=9.5)
ggplot(data.nonresponse, aes(x=latency.firstclick, y=reorder(term.eng,latency.firstclick), color=core)) + 
  xlab("Latency - First Click (seconds)") + ylab("Term") + 
  theme(plot.title = element_text(lineheight=.8, face="bold")) + 
  scale_shape(solid = TRUE) + theme_classic() + theme(legend.title=element_blank()) + geom_point(alpha=.5, size=2.5)  + 
  geom_segment(aes(y = reorder(term.eng,latency.firstclick), x = latency.firstclick.l95ci, xend = latency.firstclick.u95ci, yend = reorder(term.eng,latency.firstclick)), alpha=.4, lwd=.8) + 
  theme(legend.position = "none") + 
  scale_color_manual(values=c("dodgerblue3","grey75"),guide="none", aesthetics = c("fill","color"))
dev.off() 

# Figure SI5: Mean Latency to First Click for All Cue Words (Hong Kong) - Filtered
data.hk.latency.firstclick <- data.hk.wat.filt %>%
  group_by(term.eng) %>%
  summarise(
    latency.firstclick = mean(latency.firstclick, na.rm = TRUE)
  )

data.hk.latency.firstclick.sd <- data.hk.wat.filt %>%
  group_by(term.eng) %>%
  summarise(
    latency.firstclick.sd = sd(latency.firstclick, na.rm = TRUE)
  )

data.hk.latency.firstclick <- data.hk.latency.firstclick %>% 
  left_join(data.hk.latency.firstclick.sd, by = "term.eng")

data.hk.nonresponse <- data.hk.nonresponse %>%
  left_join(data.hk.latency.firstclick, by = "term.eng") %>%
  mutate(
    latency.firstclick.se = latency.firstclick.sd / sqrt(trials),
    latency.firstclick.u95ci = latency.firstclick + (1.96 * latency.firstclick.se),
    latency.firstclick.l95ci = latency.firstclick - (1.96 * latency.firstclick.se)
  )

pdf('./figures/fig-latency.firstclick-hk-filt.pdf', width=7.5, height=9.5)
ggplot(data.hk.nonresponse, aes(x=latency.firstclick, y=reorder(term.eng,latency.firstclick), color=core)) + 
  xlab("Latency - First Click (seconds)") + ylab("Term") + 
  theme(plot.title = element_text(lineheight=.8, face="bold")) + scale_shape(solid = TRUE) + theme_classic() + 
  theme(legend.title=element_blank()) + geom_point(alpha=.5, size=2.5)  + 
  geom_segment(aes(y = reorder(term.eng,latency.firstclick), x = latency.firstclick.l95ci, xend = latency.firstclick.u95ci, yend = reorder(term.eng,latency.firstclick)), alpha=.4, lwd=.8) + 
  theme(legend.position = "none") + 
  scale_color_manual(values=c("dodgerblue3","grey75"),guide="none", aesthetics = c("fill","color"))
dev.off() 

# Figure SI3: Mean Response Count for All Cue Words (Mainland China) - Filtered
data.tokens.num <- data.wat.filt %>%
  group_by(term.eng) %>%
  summarise(
    tokens.num = mean(tokens.num, na.rm = TRUE)
  )

data.tokens.num.sd <- data.wat.filt %>%
  group_by(term.eng) %>%
  summarise(
    tokens.num.sd = sd(tokens.num, na.rm = TRUE)
  )

data.tokens.num <- data.tokens.num %>%
  left_join(data.tokens.num.sd, by = "term.eng")

data.nonresponse <- data.nonresponse %>%
  left_join(data.tokens.num, by = "term.eng") %>%
  mutate(
    tokens.num.se = tokens.num.sd / sqrt(trials),
    tokens.num.u95ci = tokens.num + (1.96 * tokens.num.se),
    tokens.num.l95ci = tokens.num - (1.96 * tokens.num.se)
  )

pdf('./figures/fig-tokens.num-filt.pdf', width=7.5, height=9.5)
ggplot(data.nonresponse, aes(x=tokens.num, y=reorder(term.eng,tokens.num), color=core)) + 
  xlab("Count") + ylab("Term") + 
  theme(plot.title = element_text(lineheight=.8, face="bold")) + scale_shape(solid = TRUE) + theme_classic() + 
  theme(legend.title=element_blank()) + geom_point(alpha=.5, size=2.5)  + 
  geom_segment(aes(y = reorder(term.eng,tokens.num), x = tokens.num.l95ci, xend = tokens.num.u95ci, yend = reorder(term.eng,tokens.num)), alpha=.4, lwd=.8) + 
  theme(legend.position = "none") + 
  scale_color_manual(values=c("dodgerblue3","grey75"),guide="none", aesthetics = c("fill","color"))
dev.off() 

# Figure SI6: Mean Response Count for All Cue Words (Hong Kong) - Filtered
data.hk.tokens.num <- data.hk.wat.filt %>%
  group_by(term.eng) %>%
  summarise(
    tokens.num = mean(tokens.num, na.rm = TRUE)
  )

data.hk.tokens.num.sd <- data.hk.wat.filt %>%
  group_by(term.eng) %>%
  summarise(
    tokens.num.sd = sd(tokens.num, na.rm = TRUE)
  )

data.hk.tokens.num <- data.hk.tokens.num %>%
  left_join(data.hk.tokens.num.sd, by = "term.eng")

data.hk.nonresponse <- data.hk.nonresponse %>%
  left_join(data.hk.tokens.num, by = "term.eng") %>%
  mutate(
    tokens.num.se = tokens.num.sd / sqrt(trials),
    tokens.num.u95ci = tokens.num + (1.96 * tokens.num.se),
    tokens.num.l95ci = tokens.num - (1.96 * tokens.num.se)
  )

pdf('./figures/fig-tokens.num-hk-filt.pdf', width=7.5, height=9.5)
ggplot(data.hk.nonresponse, aes(x=tokens.num, y=reorder(term.eng,tokens.num), color=core)) + 
  xlab("Count") + ylab("Term") + 
  theme(plot.title = element_text(lineheight=.8, face="bold")) + scale_shape(solid = TRUE) + theme_classic() + 
  theme(legend.title=element_blank()) + geom_point(alpha=.5, size=2.5)  + 
  geom_segment(aes(y = reorder(term.eng,tokens.num), x = tokens.num.l95ci, xend = tokens.num.u95ci, yend = reorder(term.eng,tokens.num)), alpha=.4, lwd=.8) + 
  theme(legend.position = "none") + 
  scale_color_manual(values=c("dodgerblue3","grey75"),guide="none", aesthetics = c("fill","color"))
dev.off() 

# Figure: Non Response by Trial Order (Mainland China) - Filtered
data.nonresponse.cueorder <- data.wat.filt %>%
  group_by(cueorder) %>%
  summarise(nonresponse = mean(nonresponse, na.rm = TRUE)) %>%
  ungroup()

p1 <- ggplot(data.nonresponse.cueorder, aes(x=cueorder, y=nonresponse)) + 
  xlab("Trial Number") + ylab("Nonresponse Rate") + 
  theme(plot.title = element_text(lineheight=.8, face="bold")) + scale_shape(solid = TRUE) + 
  theme_classic() + theme(legend.title=element_blank()) + 
  geom_point(alpha=.5, size=2.5, color="grey50") + geom_line(alpha=.5, size=1, color="grey50") + 
  theme(legend.position = "none")  + ylim(c(0,.1)) + scale_x_continuous(breaks=seq(1,18,1)) + 
  ggtitle("Nonresponse Rate") + 
  theme(plot.title = element_text(size = 9), axis.title = element_text(size = 9)) 

# Figure: Tokens by Trial Order (Mainland China) - Filtered
data.tokens.cueorder <- data.wat.filt %>%
  group_by(cueorder) %>%
  summarise(tokens.num = mean(tokens.num, na.rm = TRUE)) %>%
  ungroup() 

p2 <- ggplot(data.tokens.cueorder, aes(x=cueorder, y=tokens.num)) + 
  xlab("Trial Number") + ylab("Count (mean)") + 
  theme(plot.title = element_text(lineheight=.8, face="bold")) + scale_shape(solid = TRUE) + 
  theme_classic() + theme(legend.title=element_blank()) + 
  geom_point(alpha=.5, size=2.5, color="grey50") + geom_line(alpha=.5, size=1, color="grey50") + 
  theme(legend.position = "none")  + ylim(c(1.8, 2.3)) + scale_x_continuous(breaks=seq(1,18,1)) + 
  ggtitle("Tokens Provided") + 
  theme(plot.title = element_text(size = 9), axis.title = element_text(size = 9)) 

mean(data.tokens.cueorder$tokens.num[1:9]) # 2.06 tokens
mean(data.tokens.cueorder$tokens.num[10:18]) # 1.99 tokens

# Figure: Latency First Click by Trial Order (Mainland China) - Filtered
data.latency.firstclick.cueorder <- data.wat.filt %>%
  group_by(cueorder) %>%
  summarise(latency.firstclick = mean(latency.firstclick, na.rm = TRUE)) %>%
  ungroup() 

p3 <- ggplot(data.latency.firstclick.cueorder, aes(x=cueorder, y=latency.firstclick)) + 
  xlab("Trial Number") + ylab("Latency - First Click (mean)") + 
  theme(plot.title = element_text(lineheight=.8, face="bold")) + scale_shape(solid = TRUE) + theme_classic() + 
  theme(legend.title=element_blank()) + geom_point(alpha=.5, size=2.5, color="grey50") + 
  geom_line(alpha=.5, size=1, color="grey50") + theme(legend.position = "none")  + ylim(c(1, 3)) + 
  scale_x_continuous(breaks=seq(1,18,1)) + ggtitle("Time to First Click") + 
  theme(plot.title = element_text(size = 9), axis.title = element_text(size = 9)) 

# Figure: Latency Submission by Trial Order (Mainland China) - Filtered
data.latency.submission.cueorder <- data.wat.filt %>%
  group_by(cueorder) %>%
  summarise(latency.submission = mean(latency.submission, na.rm = TRUE)) %>%
  ungroup() 

p4 <- ggplot(data.latency.submission.cueorder, aes(x=cueorder, y=latency.submission)) + 
  xlab("Trial Number") + ylab("Latency - Submission (mean)") + 
  theme(plot.title = element_text(lineheight=.8, face="bold")) + 
  scale_shape(solid = TRUE) + theme_classic() + theme(legend.title=element_blank()) + 
  geom_point(alpha=.5, size=2.5, color="grey50") + geom_line(alpha=.5, size=1, color="grey50") + 
  theme(legend.position = "none")  + ylim(c(10,15.25)) + scale_x_continuous(breaks=seq(1,18,1)) + 
  ggtitle("Time to Submission") + 
  theme(plot.title = element_text(size = 9), axis.title = element_text(size = 9)) 

# Figure 3: Performance Diagnostics by Trial Number (Mainland China) - Filtered

pdf('./figures/fig-diagnostics-filt.pdf', width=8.5, height=8)
arrange_ggplot2(p1, p2, p3, p4, nrow=2, ncol=2)
dev.off()

# Figure: Non Response by Trial Order (Hong Kong) - Filtered
data.hk.nonresponse.cueorder <- data.hk.wat.filt %>%
  group_by(cueorder) %>%
  summarise(nonresponse = mean(nonresponse, na.rm = TRUE)) %>%
  ungroup()

p5 <- ggplot(data.hk.nonresponse.cueorder, aes(x=cueorder, y=nonresponse)) + 
  xlab("Trial Number") + ylab("Nonresponse Rate") + 
  theme(plot.title = element_text(lineheight=.8, face="bold")) + scale_shape(solid = TRUE) + 
  theme_classic() + theme(legend.title=element_blank()) + 
  geom_point(alpha=.5, size=2.5, color="grey50") + geom_line(alpha=.5, size=1, color="grey50") + 
  theme(legend.position = "none")  + ylim(c(0,.1)) + scale_x_continuous(breaks=seq(1,18,1)) + 
  ggtitle("Nonresponse Rate") + 
  theme(plot.title = element_text(size = 9), axis.title = element_text(size = 9)) 

# Figure: Tokens by Trial Order (Hong Kong) - Filtered
data.hk.tokens.cueorder <- data.hk.wat.filt %>%
  group_by(cueorder) %>%
  summarise(tokens.num = mean(tokens.num, na.rm = TRUE)) %>%
  ungroup() 

p6 <- ggplot(data.hk.tokens.cueorder, aes(x=cueorder, y=tokens.num)) + 
  xlab("Trial Number") + ylab("Count (mean)") + 
  theme(plot.title = element_text(lineheight=.8, face="bold")) + scale_shape(solid = TRUE) + 
  theme_classic() + theme(legend.title=element_blank()) + 
  geom_point(alpha=.5, size=2.5, color="grey50") + geom_line(alpha=.5, size=1, color="grey50") + 
  theme(legend.position = "none")  + ylim(c(1.5, 2.0)) + scale_x_continuous(breaks=seq(1,18,1)) + 
  ggtitle("Tokens Provided") + 
  theme(plot.title = element_text(size = 9), axis.title = element_text(size = 9)) 

# Figure: Latency First Click by Trial Order (Hong Kong) - Filtered
data.hk.latency.firstclick.cueorder <- data.hk.wat.filt %>%
  group_by(cueorder) %>%
  summarise(latency.firstclick = mean(latency.firstclick, na.rm = TRUE)) %>%
  ungroup() 

p7 <- ggplot(data.hk.latency.firstclick.cueorder, aes(x=cueorder, y=latency.firstclick)) + 
  xlab("Trial Number") + ylab("Latency - First Click (mean)") + 
  theme(plot.title = element_text(lineheight=.8, face="bold")) + scale_shape(solid = TRUE) + theme_classic() + 
  theme(legend.title=element_blank()) + geom_point(alpha=.5, size=2.5, color="grey50") + 
  geom_line(alpha=.5, size=1, color="grey50") + theme(legend.position = "none")  + ylim(c(1, 3)) + 
  scale_x_continuous(breaks=seq(1,18,1)) + ggtitle("Time to First Click") + 
  theme(plot.title = element_text(size = 9), axis.title = element_text(size = 9)) 

# Figure: Latency Submission by Trial Order (Hong Kong) - Filtered
data.hk.latency.submission.cueorder <- data.hk.wat.filt %>%
  group_by(cueorder) %>%
  summarise(latency.submission = mean(latency.submission, na.rm = TRUE)) %>%
  ungroup() 

p8 <- ggplot(data.hk.latency.submission.cueorder, aes(x=cueorder, y=latency.submission)) + 
  xlab("Trial Number") + ylab("Latency - Submission (mean)") + 
  theme(plot.title = element_text(lineheight=.8, face="bold")) + 
  scale_shape(solid = TRUE) + theme_classic() + theme(legend.title=element_blank()) + 
  geom_point(alpha=.5, size=2.5, color="grey50") + geom_line(alpha=.5, size=1, color="grey50") + 
  theme(legend.position = "none")  + ylim(c(10,15.5)) + scale_x_continuous(breaks=seq(1,18,1)) + 
  ggtitle("Time to Submission") + 
  theme(plot.title = element_text(size = 9), axis.title = element_text(size = 9)) 

# Figure SI1: Performance Diagnostics by Trial Number (Hong Kong) - Filtered

pdf('./figures/fig-diagnostics-hk-filt.pdf', width=8.5, height=8)
arrange_ggplot2(p5, p6, p7, p8, nrow=2, ncol=2)
dev.off()

#Regression Analysis: Determinants of Response Patterns (Mainland China)
m1.1 <- lm(nonresponse ~ core + female + age + lowed + res.village + minority 
           + ccp + hukou.agr + pol.int + gov.resp + pol.know 
           + sat.central + sat.npc + sat.local + as.factor(ResponseID), data = data.wat.filt)
m1.2 <- lm(tokens.num ~ core + female + age + lowed + res.village + minority 
           + ccp + hukou.agr + pol.int + gov.resp + pol.know 
           + sat.central + sat.npc + sat.local + as.factor(ResponseID), data = data.wat.filt)
m1.3 <- lm(latency.firstclick ~ core + female + age + lowed + res.village + minority 
           + ccp + hukou.agr + pol.int + gov.resp + pol.know 
           + sat.central + sat.npc + sat.local + as.factor(ResponseID), data = data.wat.filt)

r1.1 <- coeftest(m1.1, vcov = vcovHC(m1.1, type = "HC1"))
r1.2 <- coeftest(m1.2, vcov = vcovHC(m1.2, type = "HC1"))
r1.3 <- coeftest(m1.3, vcov = vcovHC(m1.3, type = "HC1"))

p1.1 <- r1.1[, 4]
p1.2 <- r1.2[, 4]
p1.3 <- r1.3[, 4]

# Table 1
stargazer(m1.1, m1.2, m1.3,
          p = list(p1.1, p1.2, p1.3), 
          omit.stat = c("LL","ser","f"),
          report=("vc*p"),
          dep.var.labels.include = FALSE,
          keep=c("core"),
          covariate.labels = c("Political Core Cue"),
          column.labels = c("Nonresponse Rate", "Token", "Latency to First Click"))

#Regression Analysis: Determinants of Response Patterns (Hong Kong)
m2.1 <- lm(nonresponse ~ core + female + age + lowed + pol.int 
           + gov.resp + pol.know + sat.hk.pol + sat.hklc + sat.hkce 
           + trust.ccp + poli.orien + hk.indep + as.factor(ResponseID), data = data.hk.wat.filt)
m2.2 <- lm(tokens.num ~ core + female + age + lowed + pol.int 
           + gov.resp + pol.know + sat.hk.pol + sat.hklc + sat.hkce 
           + trust.ccp + poli.orien + hk.indep + as.factor(ResponseID), data = data.hk.wat.filt)
m2.3 <- lm(latency.firstclick ~ core + female + age + lowed + pol.int 
           + gov.resp + pol.know + sat.hk.pol + sat.hklc + sat.hkce 
           + trust.ccp + poli.orien + hk.indep + as.factor(ResponseID), data = data.hk.wat.filt)

r2.1 <- coeftest(m2.1, vcov = vcovHC(m2.1, type = "HC1"))
r2.2 <- coeftest(m2.2, vcov = vcovHC(m2.2, type = "HC1"))
r2.3 <- coeftest(m2.3, vcov = vcovHC(m2.3, type = "HC1"))

p2.1 <- r2.1[, 4]
p2.2 <- r2.2[, 4]
p2.3 <- r2.3[, 4]

# Table SI5
stargazer(m2.1, m2.2, m2.3,
          p = list(p2.1, p2.2, p2.3), 
          omit.stat = c("LL","ser","f"),
          report=("vc*p"),
          dep.var.labels.include = FALSE,
          keep=c("core"),
          covariate.labels = c("Political Core Cue"),
          column.labels = c("Nonresponse Rate", "Token", "Latency to First Click"))

write.csv(data.wat.filt, "./data/data.wat.filt.csv", row.names = F)
write.csv(data.hk.wat.filt, "./data/data.hk.wat.filt.csv", row.names = F)
